import pandas as pd
import pprint
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
import plotly.express as px
import plotly.subplots as subplots
import plotly.io as pio
pio.templates.default = 'plotly_dark'
pio.renderers.default = 'notebook+pdf'
from plotly import tools
#init_notebook_mode()
import os
if not os.path.exists("q3_images"):
os.mkdir("q3_images")
#Utilities.
my_printer = pprint.PrettyPrinter(depth=None)
get_nrow_ncol = lambda lst,col: [int(len(lst)/col),col]
style_df = lambda df: df.head(5).style.set_table_styles(
[{
'selector':
'th',
'props': [('background', '#FFFEE3'), ('color', 'black'),
('font-family', 'verdana')]
}, {
'selector': 'td',
'props': [('font-family', 'verdana')]
}, {
'selector': 'tr:nth-of-type(odd)',
'props': [('background', '#ADD8E6')]
}, {
'selector': 'tr:nth-of-type(even)',
'props': [('background', 'white')]
}, {
'selector': 'tr:hover',
'props': [('background-color', '#FFFEE3')]
}])
pd.set_option('max_rows', None)
states_df = pd.read_csv(r'states.csv')
style_df(states_df)
| Date | State | Confirmed | Recovered | Deceased | Other | Tested | |
|---|---|---|---|---|---|---|---|
| 0 | 2020-01-30 | Kerala | 1 | 0 | 0 | 0 | nan |
| 1 | 2020-01-30 | India | 1 | 0 | 0 | 0 | nan |
| 2 | 2020-02-02 | Kerala | 2 | 0 | 0 | 0 | nan |
| 3 | 2020-02-02 | India | 2 | 0 | 0 | 0 | nan |
| 4 | 2020-02-03 | Kerala | 3 | 0 | 0 | 0 | nan |
states_df = states_df.filter(['Date', 'State', 'Confirmed', 'Deceased'])
#Selecting the dates of interest
states_df = states_df[(states_df['Date'] >= '2021-03-15')
& (states_df['Date'] <= '2021-07-16')]
#Removing India since it's not a state.
states_df = states_df[states_df['State'] != 'India']
states_df = states_df.dropna()
#Selecting the first and second digits.
for col in ['Confirmed', 'Deceased']:
states_df['First Digit of' + ' ' +
col] = (states_df[col].astype(str).str[0])
states_df['Second Digit of' + ' ' +
col] = (states_df[col].astype(str).str[1])
style_df(states_df)
| Date | State | Confirmed | Deceased | First Digit of Confirmed | Second Digit of Confirmed | First Digit of Deceased | Second Digit of Deceased | |
|---|---|---|---|---|---|---|---|---|
| 13128 | 2021-03-15 | Andaman and Nicobar Islands | 5031 | 62 | 5 | 0 | 6 | 2 |
| 13129 | 2021-03-15 | Andhra Pradesh | 892008 | 7185 | 8 | 9 | 7 | 1 |
| 13130 | 2021-03-15 | Arunachal Pradesh | 16840 | 56 | 1 | 6 | 5 | 6 |
| 13131 | 2021-03-15 | Assam | 217817 | 1099 | 2 | 1 | 1 | 0 |
| 13132 | 2021-03-15 | Bihar | 263051 | 1552 | 2 | 6 | 1 | 5 |
#Lists of states and columns of importance.
states = states_df['State'].unique()
cols = list(states_df.columns)[4:]
a,b = get_nrow_ncol(states,3)
a
12
class plotly_plots:
def __init__(self):
self.img_bytes = None
pass
def get_layout(elf, title):
#Produces layout for a single plot.
layout = {
'title': {
'text': title,
'x': 0.4,
'y': 0.9,
'xanchor': 'center',
'yanchor': 'bottom'
},
'xaxis': {
'title': 'Digit',
'tickvals': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
},
'yaxis': {
'title': 'Count'
},
'width': 800,
'height': 400,
'barmode': 'group'
}
return layout
def get_trace(self, Dict):
#Returns traces that need to go into plot as a list.
trace_names = [*Dict.keys()]
trace_list = []
for name in trace_names:
x = [*Dict[name].keys()]
y = [*Dict[name].values()]
trace = go.Bar(x=x, y=y, name=name)
trace_list.append(trace)
return trace_list
def draw_bar_for_state(
self, state,
Dict): #Produces histogram for categorical data of a single state.
#Dict is a dict of dictinaries of 1st and 2nd digits.
data = self.get_trace(Dict)
layout = self.get_layout(state)
fig = go.Figure(data=data, layout=layout)
fig.show()
##########################################################################
##### Don't use what is below now. Under Construction. ###################
##########################################################################
def update_pos_row(self, old_pos, ncols):
new_pos = old_pos
if new_pos[-1] > ncols:
new_pos[0] = old_pos[0] + 1
new_pos[-1] = 1
return new_pos
def add_trace_to_fig(self, fig, trace_list, pos):
row, col = pos
for trace in trace_list:
fig.add_trace(trace, row, col)
return fig
def draw_bar_for_all_states(self, DICT,cols): #Produces plots for all states.
#Getting a list of all states.
states = [*DICT.keys()]
ncols = cols
nrows = len(states) / cols
fig = subplots.make_subplots(rows=int(nrows),
cols=ncols,
subplot_titles=tuple(states),shared_yaxes=True)
pos = [1, 1]
#Iterating through states.
for state in states:
Dict = DICT[state]
trace_list = self.get_trace(Dict)
pos = self.update_pos_row(pos, ncols)
fig = self.add_trace_to_fig(fig, trace_list, pos)
fig.update_layout(self.get_layout(""))
pos[-1] = pos[-1] + 1
fig.update_layout(width=1000, height=2750)
iplot(fig)
class second_wave_info(plotly_plots):
def __init__(self, data_frame):
self.df = data_frame #The whole data frame
self.cols = list(self.df.columns)[4:]
self.states = self.df['State'].unique()
self.all_state_benford_count = {}
def get_list(self, df, col):
#Converts a dataframe column of strings in to list.
lst = [v for v in df[col].tolist()
if v == v] #Removes float nan if any in the list.
return sorted(list(map(int, lst)))
def get_all_state_benford_digit_count(self):
#Iterating through all states.
for state in self.states:
temp_df = self.df[(self.df['State'] == state)]
temp_dict = {}
#Getting the first and second digits of state in loop.
for col in self.cols:
lst = self.get_list(temp_df, col)
temp_dict[col] = {n: lst.count(n) for n in lst}
self.all_state_benford_count[state] = temp_dict
return self.all_state_benford_count
def get_hist_for_state(self, state, all_state):
plt = plotly_plots()
if state: #Produces histogram of a state passed as a string 'state'.
state_benford = self.all_state_benford_count[state]
plt.draw_bar_for_state(state, state_benford)
############################################################
##### Don't use what is below now. Under Construction #####
############################################################
if all_state: #Produces histogram for all states.
plt.draw_bar_for_all_states(self.all_state_benford_count,2)
India = second_wave_info(states_df)
indian_states_benford_dict = India.get_all_state_benford_digit_count()
my_printer.pprint(indian_states_benford_dict['Tamil Nadu'])
{'First Digit of Confirmed': {1: 39, 2: 50, 8: 21, 9: 14},
'First Digit of Deceased': {1: 68, 2: 24, 3: 32},
'Second Digit of Confirmed': {0: 14,
1: 11,
2: 12,
3: 12,
4: 23,
5: 16,
6: 13,
7: 8,
8: 8,
9: 7},
'Second Digit of Deceased': {0: 7,
1: 8,
2: 43,
3: 29,
4: 9,
5: 6,
6: 5,
7: 6,
8: 6,
9: 5}}
for state in states:
India.get_hist_for_state(state = state,all_state = False)
!jupyter nbconvert --to plotlyhtml Benford_Law.ipynb
[NbConvertApp] Converting notebook Benford_Law.ipynb to plotlyhtml [NbConvertApp] Writing 606475 bytes to Benford_Law.html